In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
In [3]:
# Load weight
with open('../src/mane/prototype/embeddings/BC3047.weights', 'rb') as f:
w = p.load(f)
# Load graph
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph',
'../src/mane/data/blogcatalog3.community')
In [4]:
emb = (w[0] + w[1]) / 2
emb = normalize(emb)
In [5]:
(w[0] + w[1])[0]
Out[5]:
In [6]:
normalize(_)
Out[6]:
In [7]:
emb[0]
Out[7]:
In [9]:
x_train, yl_train, x_test, yl_test = bc.get_ids_labels(0.5)
In [10]:
X_train = [emb[i] for i in x_train]
Y_train = MultiLabelBinarizer().fit_transform(yl_train)
In [11]:
Y_train.shape
Out[11]:
In [12]:
for i,j in bc._communities.items():
if 39 in j:
print(i)
There is only 8 nodes with community 39. This might cause a problem.
In [15]:
bc._communities[1465]
Out[15]:
In [65]:
lg = OneVsRestClassifier(LogisticRegression(C=1e5))
In [66]:
lg.fit(X_train, Y_train)
Out[66]:
In [33]:
lg.predict(emb[9566].reshape(1,-1))
Out[33]:
In [30]:
emb[5].dot(emb[0])
Out[30]:
In [31]:
x_train[0]
Out[31]:
In [32]:
x_train[1]
Out[32]:
In [38]:
Y_train[8]
Out[38]:
In [39]:
lg.predict_proba(emb[1234].reshape(1,-1))
Out[39]:
In [40]:
bc._communities[1234]
Out[40]:
In [56]:
lg.predict_proba(emb[1234].reshape(1,-1)).argsort()[0][-4:]
Out[56]:
In [60]:
lg.predict_proba(emb[5437].reshape(1,-1)).argsort()[0]
Out[60]:
In [58]:
bc._communities[5437]
Out[58]:
In [61]:
for i in bc[5437]:
print(bc._communities[i])
In [62]:
bc[5437]
Out[62]:
In [72]:
for i in bc[7999]:
if 32 in bc._communities[i]:
print(i)
In [77]:
lg.predict_proba(emb[6984].reshape(1,-1))[0].argmax()
Out[77]:
In [78]:
for x in [14,691,1250,1344,1465,1550,4709,7759]:
if x in x_train:
print('la')
In [ ]: